#include <stdbool.h>
#include <stdint.h>
#include <stddef.h>

#include <irt.h>

#define NACL_CODE_BUNDLE_SIZE 32
#include <cpuinfo.h>
#include <x86/api.h>

static const uint8_t cmpxchg16b_bundle[NACL_CODE_BUNDLE_SIZE] = {
	/* MOV edi, edi */
	0x89, 0xFF,
	/* CMPXCHG16B [r15 + rdi * 1] */
	0x49, 0x0F, 0xC7, 0x0C, 0x3F,
	/* Fill remainder with HLTs */
	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
};

static const uint8_t lzcnt_bundle[NACL_CODE_BUNDLE_SIZE] = {
	/* LZCNT eax, ecx */
	0xF3, 0x0F, 0xBD, 0xC1,
	/* Fill remainder with HLTs */
	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
};

static const uint8_t popcnt_bundle[NACL_CODE_BUNDLE_SIZE] = {
	/* POPCNT eax, ecx */
	0xF3, 0x0F, 0xB8, 0xC1,
	/* Fill remainder with HLTs */
	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
};

static const uint8_t movbe_bundle[NACL_CODE_BUNDLE_SIZE] = {
	/* MOV ecx, ecx */
	0x89, 0xC9,
	/* MOVBE eax, [r15 + rcx * 1] */
	0x41, 0x0F, 0x38, 0xF0, 0x04, 0x0F,
	/* Fill remainder with HLTs */
	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
};

static const uint8_t bmi_bundle[NACL_CODE_BUNDLE_SIZE] = {
	/* ANDN eax, ecx, edx */
	0xC4, 0xE2, 0x70, 0xF2, 0xC2,
	/* Fill remainder with HLTs */
	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
};

static const uint8_t tbm_bundle[NACL_CODE_BUNDLE_SIZE] = {
	/* BLCS eax, ecx */
	0x8F, 0xE9, 0x78, 0x01, 0xD9,
	/* Fill remainder with HLTs */
	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
};

static const uint8_t three_d_now_bundle[NACL_CODE_BUNDLE_SIZE] = {
	/* PFADD mm0, mm1 */
	0x0F, 0x0F, 0xC1, 0x9E,
	/* Fill remainder with HLTs */
	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
};

static const uint8_t three_d_now_plus_bundle[NACL_CODE_BUNDLE_SIZE] = {
	/* PFNACC mm0, mm1 */
	0x0F, 0x0F, 0xC1, 0x8A,
	/* Fill remainder with HLTs */
	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
};

static const uint8_t sse3_bundle[NACL_CODE_BUNDLE_SIZE] = {
	/* HADDPS xmm0, xmm1 */
	0xF2, 0x0F, 0x7C, 0xC1,
	/* Fill remainder with HLTs */
	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
};

static const uint8_t ssse3_bundle[NACL_CODE_BUNDLE_SIZE] = {
	/* PSHUFB xmm0, xmm1 */
	0x66, 0x0F, 0x38, 0x00, 0xC1,
	/* Fill remainder with HLTs */
	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
};

static const uint8_t sse4_1_bundle[NACL_CODE_BUNDLE_SIZE] = {
	/* PMULLD xmm0, xmm1 */
	0x66, 0x0F, 0x38, 0x40, 0xC1,
	/* Fill remainder with HLTs */
	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
};

static const uint8_t sse4_2_bundle[NACL_CODE_BUNDLE_SIZE] = {
	/* PCMPGTQ xmm0, xmm1 */
	0x66, 0x0F, 0x38, 0x37, 0xC1,
	/* Fill remainder with HLTs */
	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
};

static const uint8_t sse4a_bundle[NACL_CODE_BUNDLE_SIZE] = {
	/* EXTRQ xmm0, xmm1 */
	0x66, 0x0F, 0x79, 0xC1,
	/* Fill remainder with HLTs */
	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
};

static const uint8_t aes_bundle[NACL_CODE_BUNDLE_SIZE] = {
	/* AESENC xmm0, xmm1 */
	0x66, 0x0F, 0x38, 0xDC, 0xC1,
	/* Fill remainder with HLTs */
	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
};

static const uint8_t pclmulqdq_bundle[NACL_CODE_BUNDLE_SIZE] = {
	/* PCLMULQDQ xmm0, xmm1, 0 */
	0x66, 0x0F, 0x3A, 0x44, 0xC1, 0x00,
	/* Fill remainder with HLTs */
	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
};

static const uint8_t avx_bundle[NACL_CODE_BUNDLE_SIZE] = {
	/* VPERMILPS ymm0, ymm1, 0xAA */
	0xC4, 0xE3, 0x7D, 0x04, 0xC1, 0xAA,
	/* Fill remainder with HLTs */
	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
};

static const uint8_t fma3_bundle[NACL_CODE_BUNDLE_SIZE] = {
	/* VFMADDSUB213PS ymm0, ymm1, ymm2 */
	0xC4, 0xE2, 0x75, 0xA6, 0xC2,
	/* Fill remainder with HLTs */
	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
};

static const uint8_t fma4_bundle[NACL_CODE_BUNDLE_SIZE] = {
	/* VFMADDPS ymm0, ymm1, ymm2, ymm3 */
	0xC4, 0xE3, 0xF5, 0x68, 0xC3, 0x20,
	/* Fill remainder with HLTs */
	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
};

static const uint8_t xop_bundle[NACL_CODE_BUNDLE_SIZE] = {
	/* VPHADDBQ xmm0, xmm1 */
	0x8F, 0xE9, 0x78, 0xC3, 0xC1,
	/* Fill remainder with HLTs */
	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
};

static const uint8_t f16c_bundle[NACL_CODE_BUNDLE_SIZE] = {
	/* VCVTPH2PS ymm0, xmm1 */
	0xC4, 0xE2, 0x7D, 0x13, 0xC1,
	/* Fill remainder with HLTs */
	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
};

static const uint8_t avx2_bundle[NACL_CODE_BUNDLE_SIZE] = {
	/* VPERMPS ymm0, ymm1, ymm2 */
	0xC4, 0xE2, 0x75, 0x16, 0xC2,
	/* Fill remainder with HLTs */
	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
};


struct cpuinfo_x86_isa cpuinfo_x86_nacl_detect_isa(void) {
	/*
	 * Under Native Client sandbox we can't just ask the CPU:
	 * - First, some instructions (XGETBV) necessary to query AVX support are not white-listed in the validator.
	 * - Secondly, even if CPU supports some instruction, but validator doesn't know about it (e.g. due a bug in the
	 *   ISA detection in the validator), all instructions from the "unsupported" ISA extensions will be replaced by
	 *   HLTs when the module is loaded.
	 * Thus, instead of quering the CPU about supported ISA extensions, we query the validator: we pass bundles with
	 * instructions from ISA extensions to dynamic code generation APIs, and test if they are accepted.
	 */

	struct cpuinfo_x86_isa isa = { 0 };

	struct nacl_irt_code_data_alloc nacl_irt_code_data_alloc = { 0 };
	struct nacl_irt_dyncode nacl_irt_dyncode = { 0 };
	if (sizeof(nacl_irt_code_data_alloc) != nacl_interface_query(NACL_IRT_CODE_DATA_ALLOC_v0_1,
	                                                             &nacl_irt_code_data_alloc,
	                                                             sizeof(nacl_irt_code_data_alloc)))
	{
		goto finish;
	}

	if (sizeof(nacl_irt_dyncode) != nacl_interface_query(NACL_IRT_DYNCODE_v0_1,
	                                                     &nacl_irt_dyncode,
	                                                     sizeof(nacl_irt_dyncode)))
	{
		goto finish;
	}

	const size_t allocation_size = 65536;
	uintptr_t code_segment = 0;
	if (0 != nacl_irt_code_data_alloc.allocate_code_data(0, allocation_size, 0, 0, &code_segment))
	{
		goto finish;
	}

	isa.cmpxchg16b = !nacl_irt_dyncode.dyncode_create((void*) code_segment, cmpxchg16b_bundle, NACL_CODE_BUNDLE_SIZE) &&
		(*((const uint8_t*) code_segment) != 0xF4);
	code_segment += NACL_CODE_BUNDLE_SIZE;

	isa.lzcnt = !nacl_irt_dyncode.dyncode_create((void*) code_segment, lzcnt_bundle, NACL_CODE_BUNDLE_SIZE) &&
		(*((const uint8_t*) code_segment) != 0xF4);
	code_segment += NACL_CODE_BUNDLE_SIZE;

	isa.popcnt = !nacl_irt_dyncode.dyncode_create((void*) code_segment, popcnt_bundle, NACL_CODE_BUNDLE_SIZE) &&
		(*((const uint8_t*) code_segment) != 0xF4);
	code_segment += NACL_CODE_BUNDLE_SIZE;

	isa.movbe = !nacl_irt_dyncode.dyncode_create((void*) code_segment, movbe_bundle, NACL_CODE_BUNDLE_SIZE) &&
		(*((const uint8_t*) code_segment) != 0xF4);
	code_segment += NACL_CODE_BUNDLE_SIZE;

	isa.bmi = !nacl_irt_dyncode.dyncode_create((void*) code_segment, bmi_bundle, NACL_CODE_BUNDLE_SIZE) &&
		(*((const uint8_t*) code_segment) != 0xF4);
	code_segment += NACL_CODE_BUNDLE_SIZE;

	isa.tbm = !nacl_irt_dyncode.dyncode_create((void*) code_segment, tbm_bundle, NACL_CODE_BUNDLE_SIZE) &&
		(*((const uint8_t*) code_segment) != 0xF4);
	code_segment += NACL_CODE_BUNDLE_SIZE;

	isa.three_d_now = !nacl_irt_dyncode.dyncode_create((void*) code_segment, three_d_now_bundle, NACL_CODE_BUNDLE_SIZE) &&
		(*((const uint8_t*) code_segment) != 0xF4);
	code_segment += NACL_CODE_BUNDLE_SIZE;

	isa.three_d_now_plus =
		!nacl_irt_dyncode.dyncode_create((void*) code_segment, three_d_now_plus_bundle, NACL_CODE_BUNDLE_SIZE) &&
		(*((const uint8_t*) code_segment) != 0xF4);
	code_segment += NACL_CODE_BUNDLE_SIZE;

	isa.sse3 = !nacl_irt_dyncode.dyncode_create((void*) code_segment, sse3_bundle, NACL_CODE_BUNDLE_SIZE) &&
		(*((const uint8_t*) code_segment) != 0xF4);
	code_segment += NACL_CODE_BUNDLE_SIZE;

	isa.ssse3 = !nacl_irt_dyncode.dyncode_create((void*) code_segment, ssse3_bundle, NACL_CODE_BUNDLE_SIZE) &&
		(*((const uint8_t*) code_segment) != 0xF4);
	code_segment += NACL_CODE_BUNDLE_SIZE;

	isa.sse4_1 = !nacl_irt_dyncode.dyncode_create((void*) code_segment, sse4_1_bundle, NACL_CODE_BUNDLE_SIZE) &&
		(*((const uint8_t*) code_segment) != 0xF4);
	code_segment += NACL_CODE_BUNDLE_SIZE;

	isa.sse4_2 = !nacl_irt_dyncode.dyncode_create((void*) code_segment, sse4_2_bundle, NACL_CODE_BUNDLE_SIZE) &&
		(*((const uint8_t*) code_segment) != 0xF4);
	code_segment += NACL_CODE_BUNDLE_SIZE;

	isa.sse4a = !nacl_irt_dyncode.dyncode_create((void*) code_segment, sse4a_bundle, NACL_CODE_BUNDLE_SIZE) &&
		(*((const uint8_t*) code_segment) != 0xF4);
	code_segment += NACL_CODE_BUNDLE_SIZE;

	isa.aes = !nacl_irt_dyncode.dyncode_create((void*) code_segment, aes_bundle, NACL_CODE_BUNDLE_SIZE) &&
		(*((const uint8_t*) code_segment) != 0xF4);
	code_segment += NACL_CODE_BUNDLE_SIZE;

	isa.pclmulqdq = !nacl_irt_dyncode.dyncode_create((void*) code_segment, pclmulqdq_bundle, NACL_CODE_BUNDLE_SIZE) &&
		(*((const uint8_t*) code_segment) != 0xF4);
	code_segment += NACL_CODE_BUNDLE_SIZE;

	isa.avx = !nacl_irt_dyncode.dyncode_create((void*) code_segment, avx_bundle, NACL_CODE_BUNDLE_SIZE) &&
		(*((const uint8_t*) code_segment) != 0xF4);
	code_segment += NACL_CODE_BUNDLE_SIZE;

	isa.fma3 = !nacl_irt_dyncode.dyncode_create((void*) code_segment, fma3_bundle, NACL_CODE_BUNDLE_SIZE) &&
		(*((const uint8_t*) code_segment) != 0xF4);
	code_segment += NACL_CODE_BUNDLE_SIZE;

	isa.fma4 = !nacl_irt_dyncode.dyncode_create((void*) code_segment, fma4_bundle, NACL_CODE_BUNDLE_SIZE) &&
		(*((const uint8_t*) code_segment) != 0xF4);
	code_segment += NACL_CODE_BUNDLE_SIZE;

	isa.xop = !nacl_irt_dyncode.dyncode_create((void*) code_segment, xop_bundle, NACL_CODE_BUNDLE_SIZE) &&
		(*((const uint8_t*) code_segment) != 0xF4);
	code_segment += NACL_CODE_BUNDLE_SIZE;

	isa.f16c = !nacl_irt_dyncode.dyncode_create((void*) code_segment, f16c_bundle, NACL_CODE_BUNDLE_SIZE) &&
		(*((const uint8_t*) code_segment) != 0xF4);
	code_segment += NACL_CODE_BUNDLE_SIZE;

	isa.avx2 = !nacl_irt_dyncode.dyncode_create((void*) code_segment, avx2_bundle, NACL_CODE_BUNDLE_SIZE) &&
		(*((const uint8_t*) code_segment) != 0xF4);

finish:
	return isa;
}
